#!/usr/bin/env python

import re
import sys
import urllib2


def main():
    for line in sys.stdin:
        kegg_name, ncbi_name, link = line.strip('\n').split('\t')
        try:
            urlobj = urllib2.urlopen(link)
            html = urlobj.read().replace('\n', ' ')
            m = re.match(r""".+<a class="status_icon" href="(.+)" ref="log\$=freeicon">Free PMC Article</a>.+""", html)
            link_free = m.groups()[0]
            link_free = 'http://www.ncbi.nlm.nih.gov' + link_free
        except Exception, e:
            print >> sys.stderr, 'Error occured for "%s": %s' % (kegg_name, str(e))
            link_free = None

        print '%s\t%s\t%s\t%s' % (kegg_name, ncbi_name, link, link_free)

main()

